This is an attempt at a streamlined and yet complete, relatively a priori/non- snooped model analysis. Note that this version is an overhaul, with some missing components from the older results: see MixedEffects_older.Rmd …
Some parts of the analysis have been moved into separate .R files: the overall workflow is
source("utils.R")
source("gamm4_utils.R")
Packages used/versions:
load_all_pkgs()
theme_set(theme_bw())
zmargin <- theme(panel.spacing=grid::unit(0,"lines"))
zmarginx <- theme(panel.spacing.x=grid::unit(0,"lines"))
nolegend <- theme(legend.position="none")
library('pander') ## for tables
sapply(pkgList,function(x) format(packageVersion(x)))
## lme4 gamm4 bbmle broom.mixed brms
## "1.1.21.9002" "0.2.5" "1.0.23.1" "0.2.4.9000" "2.11.1"
## data.table lattice gridExtra ggplot2 viridis
## "1.12.8" "0.20.38" "2.3" "3.2.1" "0.5.1"
## plotly cowplot ggstance plyr dplyr
## "4.9.1" "1.0.0" "0.3.3" "1.8.5" "0.8.4"
## tidyr tibble remef r2glmm raster
## "1.0.2" "2.1.3" "1.0.6.9000" "0.1.2" "3.0.12"
## rgdal fields plotrix sp
## "1.4.8" "10.3" "3.7.7" "1.3.2"
comb_out <- function(p,fn,...) {
print(p)
htmlwidgets::saveWidget(ggplotly(p,...),fn)
}
Data from Enric (includes area and lat/long coordinates):
ecoreg <- readRDS("ecoreg.rds")
Limit all observations with all predictor variables > 0 and no biome 98/99; set biome as factor. Log and scale/center variables as appropriate. This leaves us with 26 variables and 620 observations.
This can now be done semi-automatically (i.e., fit all combinations of random effects) by using the function fit_all from the utils.R file (sourced above), e.g. fit_all(response="mbirds_log")
The fit_batch.R code has already run all random-effects model combinations for all four response variables, both with lme4 and with gamm4. sum_batch.R reduces these to lists with components sum (summaries: AIC, singularity, etc.); coef (coefficients); and pred (fitted, residuals, etc.).
Load data: summaries containing
coefs: coefficients for all modelssum: summaries for all models: taxon, model, AIC, singular (singular fit or not?), df (number of parameters), best is this the “best” (min-AIC non-singular fit for each taxon) model?pred: predictions for all modelslme4_res <- readRDS("allfits_sum_lme4.rds") ## 4 taxa x 27 fits, using lmer
gamm4_res <- readRDS("allfits_sum_gamm4.rds") ## 4 taxa x 27 fits, using gamm4
Table of models with AIC<10:
aictab1 <- lme4_res$sum %>%
filter(AIC<8) %>%
arrange(taxon,AIC) %>%
mutate(AIC=round(AIC,1),
model=shorten_modelname(model))
pander(dplyr::select(aictab1,-best),emphasize.strong.rows=which(aictab1$best))
| taxon | model | AIC | singular | df |
|---|---|---|---|---|
| mamph_log | b=i/fr=i/b_FR=d | 0 | TRUE | 20 |
| mamph_log | b=d/fr=i/b_FR=i | 2 | TRUE | 20 |
| mamph_log | b=d/fr=d/b_FR=i | 3.9 | TRUE | 24 |
| mamph_log | b=d/fr=i/b_FR=d | 4 | TRUE | 24 |
| mamph_log | b=i/fr=i/b_FR=f | 5 | TRUE | 30 |
| mamph_log | b=i/fr=d/b_FR=d | 5.1 | TRUE | 24 |
| mbirds_log | b=i/fr=i/b_FR=d | 0 | FALSE | 20 |
| mbirds_log | b=i/fr=d/b_FR=f | 3 | TRUE | 34 |
| mbirds_log | b=i/fr=d/b_FR=d | 4 | TRUE | 24 |
| mbirds_log | b=i/fr=i/b_FR=f | 5.7 | TRUE | 30 |
| mbirds_log | b=d/fr=i/b_FR=d | 7.5 | TRUE | 24 |
| mmamm_log | b=i/fr=d/b_FR=f | 0 | TRUE | 34 |
| mmamm_log | b=i/fr=i/b_FR=f | 4 | TRUE | 30 |
| mmamm_log | b=d/fr=d/b_FR=f | 7.2 | TRUE | 38 |
| plants_log | b=i/fr=i/b_FR=d | 0 | TRUE | 20 |
| plants_log | b=d/fr=i/b_FR=i | 0.2 | TRUE | 20 |
| plants_log | b=i/fr=i/b_FR=f | 1.6 | TRUE | 30 |
| plants_log | b=d/fr=i/b_FR=d | 3.4 | TRUE | 24 |
| plants_log | b=d/fr=i/b_FR=f | 5.1 | TRUE | 34 |
| plants_log | b=d/fr=d/b_FR=i | 6.4 | TRUE | 24 |
| plants_log | b=i/fr=d/b_FR=d | 6.5 | TRUE | 24 |
Best (non-singular) models only:
get_best <- . %>% .[["sum"]] %>% filter(best) %>% dplyr::select(-c(best,singular))
all_best_sum <- purrr::map_dfr(list(lme4=lme4_res,gamm4=gamm4_res), get_best, .id="type")
pander(all_best_sum)
| type | taxon | model | AIC | df |
|---|---|---|---|---|
| lme4 | mbirds_log | biome=int/flor_realms=int/biome_FR=diag | 0 | 20 |
| lme4 | mmamm_log | biome=int/flor_realms=diag/biome_FR=diag | 10.3 | 24 |
| lme4 | mamph_log | biome=int/flor_realms=int/biome_FR=int | 17.25 | 16 |
| gamm4 | mbirds_log | biome=int/flor_realms=int/biome_FR=diag | 0 | 21 |
| gamm4 | plants_log | biome=int/flor_realms=int/biome_FR=diag | 1.81 | 21 |
| gamm4 | mamph_log | biome=int/flor_realms=int/biome_FR=diag | 7.614 | 21 |
| gamm4 | mmamm_log | biome=int/flor_realms=diag/biome_FR=int | 20.16 | 21 |
Our current strategy is to (1) take the best non-singular model fitted by lme4; (2) find the coefficients of the corresponding gamm4 model. (Below, we also try taking the best non-singular gamm4 model.)
get_best_tm <- . %>% get_best() %>% dplyr::select(taxon, model)
lme4_best_models <- lme4_res %>% get_best_tm()
gamm4_best_models <- gamm4_res %>% get_best_tm()
## keep only predictions from best models
gamm4_best_pred <- gamm4_res$pred %>%
right_join(lme4_best_models,by=c("taxon","model"))
Fitted vs residual for all four taxa:
ggplot(gamm4_best_pred,aes(.fitted,.resid))+
geom_point()+geom_smooth()+
facet_wrap(~taxon,nrow=1)+zmargin
A little bit heavy-tailed …
## https://stackoverflow.com/questions/40598011/how-to-customize-hover-information-in-ggplotly-object
ggqq <- ggplot(gamm4_best_pred,aes(sample=.resid,
colour=biome,
flor_realms=flor_realms))+
stat_qq()+stat_qq_line(aes(group=taxon))+
facet_wrap(~taxon,nrow=1)+zmargin
comb_out(ggqq,"ggqq.html",tooltip=c("biome","flor_realms"))
For example (was using plants, now birds because we might not be fitting the models for plants):
ex <- "mbirds_log"
gamm4_allcoef <- get_allcoefs(gamm4_res,focal_taxon=ex) %>% add_wald_ci()
lme4_allcoef <- get_allcoefs(lme4_res,focal_taxon=ex) %>% add_wald_ci()
gg_allcoef <- ggplot(gamm4_allcoef,aes(estimate,model,colour=singular,shape=singular))+
## use point + linerange because some RE are missing std.errors
geom_point()+
geom_linerangeh(aes(xmin=lwr,xmax=upr))+
facet_wrap(~term,scale="free_x")+
geom_vline(xintercept=0,lty=2)+
scale_colour_brewer(palette="Set1")+
zmargin
All gamm4 coefs for mbirds_log:
print(gg_allcoef)
all lme4 coefs for mbirds_log
print(gg_allcoef %+% lme4_allcoef)
to do: why so many singular now … ?
to do: redo profiling/profile plots, comparison of Wald/profile CIs (at least for best models …
Check difference between Wald and likelihood profile confidence intervals. In principle profile CIs are more accurate - if the computations have run reliably … but I would probably be conservative and take the wider of the two.
Pull out coefs from best models for lme4, gamm4 (from lme4-best model), gamm4 (from gamm4-best model)
## lmer-best fits fitted via gamm4
gamm4_best_coef <- gamm4_res$coef %>%
right_join(lme4_best_models,by=c("taxon","model")) %>%
add_wald_ci %>% drop_intercept
## gamm4-best fits
gamm4_best2_coef <- gamm4_res$coef %>%
right_join(gamm4_best_models,by=c("taxon","model")) %>%
add_wald_ci %>% drop_intercept
## lmer-best fits
lme4_best_coef <- lme4_res$coef %>%
right_join(lme4_best_models,by=c("taxon","model")) %>%
add_wald_ci %>% drop_intercept
all_best_coef <- bind_rows(list(lme4=lme4_best_coef,gamm4=gamm4_best_coef,
gamm4_2=gamm4_best2_coef),
.id="type")
to do: plot all coefficients including std devs (need to combine group with term for random effects)
Exclude random effects and NPP_log_sc (which is large and significant for all taxa). Abuse warning: coloring significant effects. Snooping warning: counting number of sig values for each model type!
There are not a lot of effects other than NPP_log_sc that are consistently large and significant; perhaps main effects of fire for birds and mammals. Amphibians have considerably larger effects (the last three: Feat cv and its interactions).
Enric and I decided that it was probably best to go with the gamm4-best models, with some caution expressed about the effects that were marginal. Here are the final(ish?) results, with NPP effects added back in:
Load best non-singular gamm4 fits:
best_models <- readRDS("bestmodels_gamm4.rds")
names(best_models)
## [1] "mamph_log" "mbirds_log" "mmamm_log" "plants_log"
plotfun() takes arguments:
model: fitted modelxvar (“NPP_log_sc”): x-variableauxvar (“Feat_cv_sv”): auxiliary variable (e.g. for examining interactions)respvar (equal to model response by default): response variableaux_quantiles: (0.1, 0.5, 0.9) quantiles of auxiliary variable to predictpred_lower_lim (-3) : lower cut off values (log scale)data (ecoreg)re.form (NA) which RE to include in predictions (default is none)bm <- best_models[[ex]]
p1A <- plotfun(bm)+nolegend
p1B <- plotfun(bm,backtrans=TRUE)+nolegend
p1C <- plotfun(bm,backtrans=TRUE,log="xy")+nolegend
cowplot::plot_grid(p1A,p1B,p1C,labels="auto")
This plot shows (a) observed points and fit on log scale (using “fire eaten” as aux variable, i.e. lines show predictions for 0.1/0.5/0.9 quantiles of fire-eaten); (b) points and back-transformed predictions; (c) the same, but with a constrained scale; (d) back-transformed preds with scaled axes (scale_*_log10()) (scale also constrained)
Partial residuals:
p1D <- plotfun(bm,respvar="partial_res")+nolegend
p1E <- plotfun(bm,respvar="partial_res",backtrans=TRUE)+nolegend
p1F <- plotfun(bm,respvar="partial_res",backtrans=TRUE,log="xy")+nolegend
partial_res_plot <- cowplot::plot_grid(p1D,p1E,p1F,labels="auto")
print(partial_res_plot)
Only a few effects have partial \(R^2\) values of more than a few percent: NPP (of course), fire (for mammals and ?birds?), and fire CV (for amphibians). Everything else is going to be pretty subtle (provided of course that we trust this particular way of estimating \(R^2\)).
all_rsq <- bind_rows(lapply(best_models,
function(x) r2beta(x$mer)),.id="taxon") %>%
mutate(Effect=as.character(Effect),
Effect=gsub("^X","",Effect),
Effect=reorder(factor(Effect),Rsq))
rsqplot <- ggplot(all_rsq,aes(Rsq,Effect,colour=taxon,shape=taxon))+
geom_pointrangeh(aes(xmin=lower.CL,xmax=upper.CL),
position=position_dodgev(height=0.5))+
scale_colour_brewer(palette="Dark2")+
## scale_x_log10(limits=c(1e-2,1),oob=scales::squish)+
labs(y="")
print(rsqplot)
Plot in two panels:
all_rsq$upper <- with(all_rsq,Effect %in% c("Model","NPP_log_sc","log(area_km2)"))
r1 <- rsqplot %+% subset(all_rsq,upper)
r2 <- rsqplot %+% subset(all_rsq,!upper)
## facet_wrap doesn't have 'space' argument ...
## rsqplot %+% all_rsq +facet_wrap(~upper,ncol=1,scales="free")
## facet_grid doesn't use axes for all facets ...
## rsqplot %+% all_rsq +facet_grid(upper~.,scales="free",space="free") +
## theme(strip.text=element_blank())
## https://cran.r-project.org/web/packages/cowplot/vignettes/shared_legends.html
L <- get_legend(r1)
## stack facets
p1 <- plot_grid(r1
+labs(x="")
## rectangle indicating scale of lower box w/in upper
+ geom_rect(xmin=0,xmax=0.125,ymin=-Inf,ymax=Inf,
fill="black",
colour=NA,alpha=0.02)
+ theme(legend.position="none"),
r2+theme(legend.position="none"),
ncol=1,
align="v",rel_heights=c(0.3,0.7),axis="b")
## add legend
plot_grid(p1,L,rel_widths=c(2,0.4))
(from fit_batch.R): diagonal (independent) random effects at the biome and flor realm level. One of the keys here is that the line for each biome is estimated based on the median values of the other predictors (fire, NPP CV, area, etc.) for that particular biome, not the global median …
allfits_restr_gamm4 <- readRDS("allfits_restr_gamm4.rds")
predList <- lapply(allfits_restr_gamm4,
predfun,
auxvar=NULL,grpvar="biome",
re.form=~(1+NPP_log_sc|biome))
set.focal <- function(n,d) { d$focal <- d[[n]]; return(d) }
predList <- Map(set.focal,names(predList),predList)
predFrame <- bind_rows(predList,.id="taxon")
dList <- setNames(replicate(4,ecoreg,simplify=FALSE),names(predList))
dList <- Map(set.focal,names(dList),dList)
dFrame <- bind_rows(dList,.id="taxon")
ggplot(predFrame,aes(x=NPP_log_sc,y=focal,colour=biome))+
geom_line(lwd=1.5)+
geom_point(data=dFrame,aes(shape=flor_realms),alpha=0.7)+
facet_wrap(~taxon)+zmargin+
labs(y="log diversity")+
theme(legend.position="bottom")
Version with Feat_log_sc (main effect of fire) as focal predictor:
predList_fire <- lapply(allfits_restr_gamm4,
predfun,
xvar="Feat_log_sc",
auxvar=NULL,grpvar="biome",
re.form=~(1+Feat_log_sc|biome))
predList_fire <- Map(set.focal,names(predList_fire),predList_fire)
predFrame_fire <- bind_rows(predList_fire,.id="taxon")
(gpbf1 <- ggplot(predFrame_fire,aes(x=Feat_log_sc,y=focal,colour=biome))+
geom_line(lwd=1.5)+
geom_point(data=dFrame,aes(shape=flor_realms),alpha=0.7)+
facet_wrap(~taxon)+zmargin+
labs(y="log diversity")+
theme(legend.position="bottom"))
gpbf1 + geom_ribbon(colour=NA,aes(ymin=lwr,ymax=upr,group=biome),alpha=0.2)
Plot random effects estimates with \(\pm\) 2 SE (these effects do not include the effects of the variation of levels of NPP and fire across biomes/realms; they represent deviations from the expectation based on the population-level effects …)
do.call(plot_grid,ggL)
ff <- filter(tt_all, taxon=="mbirds_log", group=="biome",term=="NPP_log_sc")
gg2A %+% ff
In this particular case the effect of NPP only varies across floristic realms, so the picture isn’t especially pretty:
coefs_plants <- merge_coefs(ecoreg,best_models[[1]])
ggplot(coefs_plants,aes(x,y,colour=NPP_log_sc))+
geom_point()+
scale_colour_viridis()